{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9a3c7284",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "63f9a64f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#tumor stage and treatment_type\n",
    "tcga_clinical=pd.read_csv('tcga-skcm/clinical.csv')\n",
    "tcga_clinical_sub=tcga_clinical[['case_submitter_id','ajcc_pathologic_m','ajcc_pathologic_t']]\n",
    "tcga_clinical_sub=tcga_clinical_sub.rename(columns = {\"case_submitter_id\":\"Patients\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "99f87fbc",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "M0     836\n",
       "'--     56\n",
       "M1c     20\n",
       "M1b     10\n",
       "M1      10\n",
       "M1a      8\n",
       "Name: ajcc_pathologic_m, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tcga_clinical_sub['ajcc_pathologic_m'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0e185cb7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Patients</th>\n",
       "      <th>ajcc_pathologic_m</th>\n",
       "      <th>ajcc_pathologic_t</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>TCGA-GN-A263</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T4b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>TCGA-GN-A263</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T4b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>TCGA-WE-A8K5</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T2a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>TCGA-WE-A8K5</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T2a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>TCGA-FS-A1Z3</td>\n",
       "      <td>M1</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>TCGA-FS-A1Z3</td>\n",
       "      <td>M1</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>152</th>\n",
       "      <td>TCGA-EB-A5FP</td>\n",
       "      <td>M1b</td>\n",
       "      <td>T4b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>153</th>\n",
       "      <td>TCGA-EB-A5FP</td>\n",
       "      <td>M1b</td>\n",
       "      <td>T4b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>188</th>\n",
       "      <td>TCGA-RP-A695</td>\n",
       "      <td>M1c</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>189</th>\n",
       "      <td>TCGA-RP-A695</td>\n",
       "      <td>M1c</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>TCGA-RP-A693</td>\n",
       "      <td>M1c</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199</th>\n",
       "      <td>TCGA-RP-A693</td>\n",
       "      <td>M1c</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222</th>\n",
       "      <td>TCGA-ER-A19A</td>\n",
       "      <td>M1</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223</th>\n",
       "      <td>TCGA-ER-A19A</td>\n",
       "      <td>M1</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228</th>\n",
       "      <td>TCGA-FW-A3I3</td>\n",
       "      <td>M1</td>\n",
       "      <td>'--</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>229</th>\n",
       "      <td>TCGA-FW-A3I3</td>\n",
       "      <td>M1</td>\n",
       "      <td>'--</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>322</th>\n",
       "      <td>TCGA-WE-A8ZT</td>\n",
       "      <td>M1b</td>\n",
       "      <td>T3b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>323</th>\n",
       "      <td>TCGA-WE-A8ZT</td>\n",
       "      <td>M1b</td>\n",
       "      <td>T3b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>340</th>\n",
       "      <td>TCGA-BF-AAP0</td>\n",
       "      <td>M1</td>\n",
       "      <td>T4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>341</th>\n",
       "      <td>TCGA-BF-AAP0</td>\n",
       "      <td>M1</td>\n",
       "      <td>T4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>372</th>\n",
       "      <td>TCGA-DA-A95Z</td>\n",
       "      <td>M1a</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>373</th>\n",
       "      <td>TCGA-DA-A95Z</td>\n",
       "      <td>M1a</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>378</th>\n",
       "      <td>TCGA-DA-A1I0</td>\n",
       "      <td>M1a</td>\n",
       "      <td>T4b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>379</th>\n",
       "      <td>TCGA-DA-A1I0</td>\n",
       "      <td>M1a</td>\n",
       "      <td>T4b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>454</th>\n",
       "      <td>TCGA-D3-A1Q8</td>\n",
       "      <td>M1b</td>\n",
       "      <td>T0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>455</th>\n",
       "      <td>TCGA-D3-A1Q8</td>\n",
       "      <td>M1b</td>\n",
       "      <td>T0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>498</th>\n",
       "      <td>TCGA-ER-A3PL</td>\n",
       "      <td>M1a</td>\n",
       "      <td>T3b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>499</th>\n",
       "      <td>TCGA-ER-A3PL</td>\n",
       "      <td>M1a</td>\n",
       "      <td>T3b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>582</th>\n",
       "      <td>TCGA-FS-A1ZH</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T3b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>583</th>\n",
       "      <td>TCGA-FS-A1ZH</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T3b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>608</th>\n",
       "      <td>TCGA-EE-A20I</td>\n",
       "      <td>M1c</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>609</th>\n",
       "      <td>TCGA-EE-A20I</td>\n",
       "      <td>M1c</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>662</th>\n",
       "      <td>TCGA-D3-A51N</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>663</th>\n",
       "      <td>TCGA-D3-A51N</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>666</th>\n",
       "      <td>TCGA-ER-A19T</td>\n",
       "      <td>M1a</td>\n",
       "      <td>T4a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>667</th>\n",
       "      <td>TCGA-ER-A19T</td>\n",
       "      <td>M1a</td>\n",
       "      <td>T4a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>688</th>\n",
       "      <td>TCGA-D3-A5GS</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T1b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>689</th>\n",
       "      <td>TCGA-D3-A5GS</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T1b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>736</th>\n",
       "      <td>TCGA-ER-A19J</td>\n",
       "      <td>M1</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>737</th>\n",
       "      <td>TCGA-ER-A19J</td>\n",
       "      <td>M1</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>738</th>\n",
       "      <td>TCGA-RP-A694</td>\n",
       "      <td>M1c</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>739</th>\n",
       "      <td>TCGA-RP-A694</td>\n",
       "      <td>M1c</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>794</th>\n",
       "      <td>TCGA-D3-A8GE</td>\n",
       "      <td>M1b</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>795</th>\n",
       "      <td>TCGA-D3-A8GE</td>\n",
       "      <td>M1b</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>814</th>\n",
       "      <td>TCGA-DA-A1I5</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T1a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>815</th>\n",
       "      <td>TCGA-DA-A1I5</td>\n",
       "      <td>M1c</td>\n",
       "      <td>T1a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>938</th>\n",
       "      <td>TCGA-D9-A148</td>\n",
       "      <td>M1b</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>939</th>\n",
       "      <td>TCGA-D9-A148</td>\n",
       "      <td>M1b</td>\n",
       "      <td>TX</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Patients ajcc_pathologic_m ajcc_pathologic_t\n",
       "30   TCGA-GN-A263               M1c               T4b\n",
       "31   TCGA-GN-A263               M1c               T4b\n",
       "52   TCGA-WE-A8K5               M1c               T2a\n",
       "53   TCGA-WE-A8K5               M1c               T2a\n",
       "136  TCGA-FS-A1Z3                M1                TX\n",
       "137  TCGA-FS-A1Z3                M1                TX\n",
       "152  TCGA-EB-A5FP               M1b               T4b\n",
       "153  TCGA-EB-A5FP               M1b               T4b\n",
       "188  TCGA-RP-A695               M1c                TX\n",
       "189  TCGA-RP-A695               M1c                TX\n",
       "198  TCGA-RP-A693               M1c                TX\n",
       "199  TCGA-RP-A693               M1c                TX\n",
       "222  TCGA-ER-A19A                M1                TX\n",
       "223  TCGA-ER-A19A                M1                TX\n",
       "228  TCGA-FW-A3I3                M1               '--\n",
       "229  TCGA-FW-A3I3                M1               '--\n",
       "322  TCGA-WE-A8ZT               M1b               T3b\n",
       "323  TCGA-WE-A8ZT               M1b               T3b\n",
       "340  TCGA-BF-AAP0                M1                T4\n",
       "341  TCGA-BF-AAP0                M1                T4\n",
       "372  TCGA-DA-A95Z               M1a                TX\n",
       "373  TCGA-DA-A95Z               M1a                TX\n",
       "378  TCGA-DA-A1I0               M1a               T4b\n",
       "379  TCGA-DA-A1I0               M1a               T4b\n",
       "454  TCGA-D3-A1Q8               M1b                T0\n",
       "455  TCGA-D3-A1Q8               M1b                T0\n",
       "498  TCGA-ER-A3PL               M1a               T3b\n",
       "499  TCGA-ER-A3PL               M1a               T3b\n",
       "582  TCGA-FS-A1ZH               M1c               T3b\n",
       "583  TCGA-FS-A1ZH               M1c               T3b\n",
       "608  TCGA-EE-A20I               M1c                TX\n",
       "609  TCGA-EE-A20I               M1c                TX\n",
       "662  TCGA-D3-A51N               M1c                T0\n",
       "663  TCGA-D3-A51N               M1c                T0\n",
       "666  TCGA-ER-A19T               M1a               T4a\n",
       "667  TCGA-ER-A19T               M1a               T4a\n",
       "688  TCGA-D3-A5GS               M1c               T1b\n",
       "689  TCGA-D3-A5GS               M1c               T1b\n",
       "736  TCGA-ER-A19J                M1                TX\n",
       "737  TCGA-ER-A19J                M1                TX\n",
       "738  TCGA-RP-A694               M1c                TX\n",
       "739  TCGA-RP-A694               M1c                TX\n",
       "794  TCGA-D3-A8GE               M1b                TX\n",
       "795  TCGA-D3-A8GE               M1b                TX\n",
       "814  TCGA-DA-A1I5               M1c               T1a\n",
       "815  TCGA-DA-A1I5               M1c               T1a\n",
       "938  TCGA-D9-A148               M1b                TX\n",
       "939  TCGA-D9-A148               M1b                TX"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tcga_clinical_sub[tcga_clinical_sub['ajcc_pathologic_m'].isin(['M1c','M1','M1b','M1a'])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "348bb6cb",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "tcga_clinical_sub=tcga_clinical_sub.drop_duplicates(subset=['Patients'], keep='first')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "35ba1b55",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "#ssGSEA of MHC-I and MHC-II\n",
    "mhc=pd.read_csv('tcga-skcm/TCGA-SKCM-ssGSEA.csv')\n",
    "for i in range(len(mhc)):\n",
    "    patient_id=mhc.iloc[i,0]\n",
    "    patient_id_l=patient_id.split('-')\n",
    "    patient_id_new=patient_id_l[0]+'-'+patient_id_l[1]+'-'+patient_id_l[2]\n",
    "    mhc.iloc[i,0]=patient_id_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c74d3677",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Patients</th>\n",
       "      <th>MHC-II HLA Geneset</th>\n",
       "      <th>MHC-I HLA and APM Geneset</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TCGA-3N-A9WB</td>\n",
       "      <td>-10662.623480</td>\n",
       "      <td>-1602.267569</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>TCGA-3N-A9WC</td>\n",
       "      <td>10449.358510</td>\n",
       "      <td>10360.316490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>TCGA-3N-A9WD</td>\n",
       "      <td>5523.701612</td>\n",
       "      <td>2741.637894</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Patients  MHC-II HLA Geneset  MHC-I HLA and APM Geneset\n",
       "0  TCGA-3N-A9WB       -10662.623480               -1602.267569\n",
       "1  TCGA-3N-A9WC        10449.358510               10360.316490\n",
       "2  TCGA-3N-A9WD         5523.701612                2741.637894"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mhc.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9a2ebaa7",
   "metadata": {},
   "outputs": [],
   "source": [
    "#abstract the pd-1 expression \n",
    "#tcga_genes=pd.read_csv('tcga-skcm/RNA_TCGA_SKCM.csv')\n",
    "#tcga_pd=tcga_genes[tcga_genes['GeneSymbol']=='PDCD1'].T\n",
    "#tcga_pd.to_csv('tcga-skcm/PDCD1.csv')\n",
    "tcga_pd=pd.read_csv('tcga-skcm/PDCD1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "a6a47da6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Patients</th>\n",
       "      <th>T cells CD8</th>\n",
       "      <th>T cells regulatory (Tregs)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TCGA-3N-A9WB</td>\n",
       "      <td>0.026118</td>\n",
       "      <td>0.006082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>TCGA-3N-A9WC</td>\n",
       "      <td>0.398264</td>\n",
       "      <td>0.054761</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>TCGA-3N-A9WD</td>\n",
       "      <td>0.252902</td>\n",
       "      <td>0.074125</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Patients  T cells CD8  T cells regulatory (Tregs)\n",
       "0  TCGA-3N-A9WB     0.026118                    0.006082\n",
       "1  TCGA-3N-A9WC     0.398264                    0.054761\n",
       "2  TCGA-3N-A9WD     0.252902                    0.074125"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#from the cibersort analysis import the information of treg\n",
    "tcga_treg=pd.read_csv('tcga-skcm/TCGA_SKCM_CIBERSORT.csv')\n",
    "tcga_treg_sub=tcga_treg[['sample_name','T cells CD8','T cells regulatory (Tregs)']]\n",
    "tcga_treg_sub=tcga_treg_sub.rename(columns = {\"sample_name\":\"Patients\"})\n",
    "tcga_treg_sub.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "2a5a35aa",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Patients</th>\n",
       "      <th>T.CD8</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TCGA-EE-A2GE</td>\n",
       "      <td>1.509375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>TCGA-ER-A193</td>\n",
       "      <td>1.019785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>TCGA-EB-A3Y6</td>\n",
       "      <td>-0.263430</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Patients     T.CD8\n",
       "0  TCGA-EE-A2GE  1.509375\n",
       "1  TCGA-ER-A193  1.019785\n",
       "2  TCGA-EB-A3Y6 -0.263430"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#from the time signature import the informaiton of cd8+T \n",
    "tcga_time=pd.read_csv('tcga-skcm/tcga_time.csv')\n",
    "tcga_time_sub=tcga_time[['x','T.CD8']]\n",
    "tcga_time_sub=tcga_time_sub.rename(columns = {\"x\":\"Patients\"})\n",
    "tcga_time_sub.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "271378ab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAGdCAYAAACyzRGfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAgLUlEQVR4nO3dfVSb9f3/8VcsawQb4mrbpDmlJWq8xTptPayoA2/IGdaedmzdFOfBVXdaad1Yz4ZFthk9a6ioDCcTrWcH8Xiw/mPVs84KbkrdYT2j1c4edN5MWphthjeYYMvg2F6/P/w1XzPqTSB80os+H+dc55jPdSW8yVF5ng8hcViWZQkAAMCQE9I9AAAAOL4QHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADAqI90D/K/Dhw9r3759crlccjgc6R4HAAB8BZZlaXBwUD6fTyec8MV7G8dcfOzbt085OTnpHgMAAIxBX1+f5syZ84XXHHPx4XK5JH06fHZ2dpqnAQAAX0UsFlNOTk785/gXOebi48ivWrKzs4kPAABs5qu8ZIIXnAIAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGZaR7AADHl9x1W9I9wpjs2bA43SMAkwY7HwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYlFR+ffPKJfvnLX8rv9yszM1Onnnqq7rzzTh0+fDh+jWVZCoVC8vl8yszMVFFRkbq7u1M+OAAAsKek4uOuu+7Sgw8+qMbGRr3++uuqq6vT3Xffrfvvvz9+TV1dnerr69XY2Kiuri55vV4VFxdrcHAw5cMDAAD7SSo+/va3v2np0qVavHixcnNz9b3vfU/BYFA7duyQ9OmuR0NDg2pqalRaWqq8vDy1tLTo4MGDam1tnZBvAAAA2EtS8XHJJZfoz3/+s958801J0j/+8Q/99a9/1VVXXSVJ6unpUSQSUTAYjN/H6XSqsLBQnZ2dR33M4eFhxWKxhAMAAExeSX2w3K233qpoNKqzzjpLU6ZM0aFDh7R+/Xpde+21kqRIJCJJ8ng8CffzeDzau3fvUR+ztrZWd9xxx1hmBwAANpTUzscTTzyhxx57TK2trXr55ZfV0tKie+65Ry0tLQnXORyOhNuWZY1aO6K6ulrRaDR+9PX1JfktAAAAO0lq5+MXv/iF1q1bp2uuuUaSdN5552nv3r2qra1VeXm5vF6vpE93QGbPnh2/X39//6jdkCOcTqecTudY5wcAADaT1M7HwYMHdcIJiXeZMmVK/E9t/X6/vF6v2tvb4+dHRkbU0dGhgoKCFIwLAADsLqmdjyVLlmj9+vWaO3euzj33XL3yyiuqr6/XihUrJH3665bKykqFw2EFAgEFAgGFw2FlZWWprKxsQr4BAABgL0nFx/33369f/epXqqioUH9/v3w+n1auXKlf//rX8Wuqqqo0NDSkiooKDQwMKD8/X21tbXK5XCkfHgAA2I/Dsiwr3UN8ViwWk9vtVjQaVXZ2drrHAZBiueu2pHuEMdmzYXG6RwCOacn8/OazXQAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMCqpz3ZBetjx7ah5K2oAwOdh5wMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjEoqPnJzc+VwOEYdq1evliRZlqVQKCSfz6fMzEwVFRWpu7t7QgYHAAD2lFR8dHV1af/+/fGjvb1dkrR8+XJJUl1dnerr69XY2Kiuri55vV4VFxdrcHAw9ZMDAABbSio+Zs6cKa/XGz/++Mc/6rTTTlNhYaEsy1JDQ4NqampUWlqqvLw8tbS06ODBg2ptbZ2o+QEAgM2M+TUfIyMjeuyxx7RixQo5HA719PQoEokoGAzGr3E6nSosLFRnZ+fnPs7w8LBisVjCAQAAJq+Msd7xqaee0kcffaQbbrhBkhSJRCRJHo8n4TqPx6O9e/d+7uPU1tbqjjvuGOsYQMrkrtuS7hGStmfD4nSPAABJG/POxx/+8AeVlJTI5/MlrDscjoTblmWNWvus6upqRaPR+NHX1zfWkQAAgA2Maedj7969ev755/Xkk0/G17xer6RPd0Bmz54dX+/v7x+1G/JZTqdTTqdzLGMAAAAbGtPOR3Nzs2bNmqXFi/9vy9fv98vr9cb/Akb69HUhHR0dKigoGP+kAABgUkh65+Pw4cNqbm5WeXm5MjL+7+4Oh0OVlZUKh8MKBAIKBAIKh8PKyspSWVlZSocGAAD2lXR8PP/88+rt7dWKFStGnauqqtLQ0JAqKio0MDCg/Px8tbW1yeVypWRYAABgf0nHRzAYlGVZRz3ncDgUCoUUCoXGOxcAAJik+GwXAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMSvrt1YGvInfdlnSPAAA4RrHzAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMCopOPj3Xff1Q9/+EOdcsopysrK0je+8Q3t3Lkzft6yLIVCIfl8PmVmZqqoqEjd3d0pHRoAANhXUvExMDCgiy++WF/72tf07LPP6rXXXtO9996rk08+OX5NXV2d6uvr1djYqK6uLnm9XhUXF2twcDDVswMAABvKSObiu+66Szk5OWpubo6v5ebmxv/Zsiw1NDSopqZGpaWlkqSWlhZ5PB61trZq5cqVqZkaAADYVlI7H88884wWLlyo5cuXa9asWbrgggv08MMPx8/39PQoEokoGAzG15xOpwoLC9XZ2XnUxxweHlYsFks4AADA5JVUfLzzzjtqampSIBDQc889p1WrVuknP/mJHn30UUlSJBKRJHk8noT7eTye+Ln/VVtbK7fbHT9ycnLG8n0AAACbSCo+Dh8+rAsvvFDhcFgXXHCBVq5cqR//+MdqampKuM7hcCTctixr1NoR1dXVikaj8aOvry/JbwEAANhJUvExe/ZsnXPOOQlrZ599tnp7eyVJXq9XkkbtcvT394/aDTnC6XQqOzs74QAAAJNXUvFx8cUX64033khYe/PNNzVv3jxJkt/vl9frVXt7e/z8yMiIOjo6VFBQkIJxAQCA3SX11y4/+9nPVFBQoHA4rO9///v6+9//ro0bN2rjxo2SPv11S2VlpcLhsAKBgAKBgMLhsLKyslRWVjYh3wAAALCXpOLjoosu0ubNm1VdXa0777xTfr9fDQ0Nuu666+LXVFVVaWhoSBUVFRoYGFB+fr7a2trkcrlSPjwAALCfpOJDkq6++mpdffXVn3ve4XAoFAopFAqNZy4AADBJ8dkuAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYlfTbqwM4duSu25LuEQAgaex8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRScVHKBSSw+FIOLxeb/y8ZVkKhULy+XzKzMxUUVGRuru7Uz40AACwr6R3Ps4991zt378/fuzevTt+rq6uTvX19WpsbFRXV5e8Xq+Ki4s1ODiY0qEBAIB9JR0fGRkZ8nq98WPmzJmSPt31aGhoUE1NjUpLS5WXl6eWlhYdPHhQra2tKR8cAADYU9Lx8dZbb8nn88nv9+uaa67RO++8I0nq6elRJBJRMBiMX+t0OlVYWKjOzs7Pfbzh4WHFYrGEAwAATF5JxUd+fr4effRRPffcc3r44YcViURUUFCgDz74QJFIRJLk8XgS7uPxeOLnjqa2tlZutzt+5OTkjOHbAAAAdpFUfJSUlOi73/2uzjvvPF155ZXasmWLJKmlpSV+jcPhSLiPZVmj1j6rurpa0Wg0fvT19SUzEgAAsJlx/antSSedpPPOO09vvfVW/K9e/neXo7+/f9RuyGc5nU5lZ2cnHAAAYPIaV3wMDw/r9ddf1+zZs+X3++X1etXe3h4/PzIyoo6ODhUUFIx7UAAAMDlkJHPxz3/+cy1ZskRz585Vf3+/fvOb3ygWi6m8vFwOh0OVlZUKh8MKBAIKBAIKh8PKyspSWVnZRM0PAABsJqn4+Pe//61rr71W77//vmbOnKlvfvOb2r59u+bNmydJqqqq0tDQkCoqKjQwMKD8/Hy1tbXJ5XJNyPAAAMB+HJZlWeke4rNisZjcbrei0Siv//j/ctdtSfcIwHFvz4bF6R4BOKYl8/Obz3YBAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABiVke4BAMAOctdtSfcISduzYXG6RwCOip0PAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBR44qP2tpaORwOVVZWxtcsy1IoFJLP51NmZqaKiorU3d093jkBAMAkMeb46Orq0saNGzV//vyE9bq6OtXX16uxsVFdXV3yer0qLi7W4ODguIcFAAD2N6b4+Pjjj3Xdddfp4Ycf1te//vX4umVZamhoUE1NjUpLS5WXl6eWlhYdPHhQra2tKRsaAADY15jiY/Xq1Vq8eLGuvPLKhPWenh5FIhEFg8H4mtPpVGFhoTo7O8c3KQAAmBQykr3Dpk2b9PLLL6urq2vUuUgkIknyeDwJ6x6PR3v37j3q4w0PD2t4eDh+OxaLJTsSAACwkaR2Pvr6+vTTn/5Ujz32mE488cTPvc7hcCTctixr1NoRtbW1crvd8SMnJyeZkQAAgM0kFR87d+5Uf3+/FixYoIyMDGVkZKijo0O/+93vlJGREd/xOLIDckR/f/+o3ZAjqqurFY1G40dfX98YvxUAAGAHSf3a5YorrtDu3bsT1n70ox/prLPO0q233qpTTz1VXq9X7e3tuuCCCyRJIyMj6ujo0F133XXUx3Q6nXI6nWMcHwAA2E1S8eFyuZSXl5ewdtJJJ+mUU06Jr1dWViocDisQCCgQCCgcDisrK0tlZWWpmxoAANhW0i84/TJVVVUaGhpSRUWFBgYGlJ+fr7a2NrlcrlR/KQAAYEMOy7KsdA/xWbFYTG63W9FoVNnZ2eke55iQu25LukcAYEN7NixO9wg4jiTz85vPdgEAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMIj4AAIBRGekewLTcdVvSPQIAAMc1dj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGEV8AAAAo4gPAABgFPEBAACMSio+mpqaNH/+fGVnZys7O1uLFi3Ss88+Gz9vWZZCoZB8Pp8yMzNVVFSk7u7ulA8NAADsK6n4mDNnjjZs2KAdO3Zox44duvzyy7V06dJ4YNTV1am+vl6NjY3q6uqS1+tVcXGxBgcHJ2R4AABgP0nFx5IlS3TVVVfpjDPO0BlnnKH169dr2rRp2r59uyzLUkNDg2pqalRaWqq8vDy1tLTo4MGDam1tnaj5AQCAzYz5NR+HDh3Spk2bdODAAS1atEg9PT2KRCIKBoPxa5xOpwoLC9XZ2fm5jzM8PKxYLJZwAACAySvp+Ni9e7emTZsmp9OpVatWafPmzTrnnHMUiUQkSR6PJ+F6j8cTP3c0tbW1crvd8SMnJyfZkQAAgI0kHR9nnnmmdu3ape3bt+vmm29WeXm5Xnvttfh5h8ORcL1lWaPWPqu6ulrRaDR+9PX1JTsSAACwkYxk7zB16lSdfvrpkqSFCxeqq6tL9913n2699VZJUiQS0ezZs+PX9/f3j9oN+Syn0ymn05nsGAAAwKbG/T4flmVpeHhYfr9fXq9X7e3t8XMjIyPq6OhQQUHBeL8MAACYJJLa+bjttttUUlKinJwcDQ4OatOmTXrxxRe1detWORwOVVZWKhwOKxAIKBAIKBwOKysrS2VlZRM1PwAAsJmk4uM///mPrr/+eu3fv19ut1vz58/X1q1bVVxcLEmqqqrS0NCQKioqNDAwoPz8fLW1tcnlck3I8AAAwH4clmVZ6R7is2KxmNxut6LRqLKzs1P++LnrtqT8MQHgWLRnw+J0j4DjSDI/v/lsFwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMCojHQPAACYGLnrtqR7hKTt2bA43SPAAHY+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjEoqPmpra3XRRRfJ5XJp1qxZWrZsmd54442EayzLUigUks/nU2ZmpoqKitTd3Z3SoQEAgH0lFR8dHR1avXq1tm/frvb2dn3yyScKBoM6cOBA/Jq6ujrV19ersbFRXV1d8nq9Ki4u1uDgYMqHBwAA9pORzMVbt25NuN3c3KxZs2Zp586d+ta3viXLstTQ0KCamhqVlpZKklpaWuTxeNTa2qqVK1embnIAAGBL43rNRzQalSRNnz5dktTT06NIJKJgMBi/xul0qrCwUJ2dnUd9jOHhYcVisYQDAABMXmOOD8uytHbtWl1yySXKy8uTJEUiEUmSx+NJuNbj8cTP/a/a2lq53e74kZOTM9aRAACADYw5PtasWaNXX31Vjz/++KhzDocj4bZlWaPWjqiurlY0Go0ffX19Yx0JAADYQFKv+Tjilltu0TPPPKNt27Zpzpw58XWv1yvp0x2Q2bNnx9f7+/tH7YYc4XQ65XQ6xzIGAACwoaR2PizL0po1a/Tkk0/qL3/5i/x+f8J5v98vr9er9vb2+NrIyIg6OjpUUFCQmokBAICtJbXzsXr1arW2turpp5+Wy+WKv47D7XYrMzNTDodDlZWVCofDCgQCCgQCCofDysrKUllZ2YR8AwAAwF6Sio+mpiZJUlFRUcJ6c3OzbrjhBklSVVWVhoaGVFFRoYGBAeXn56utrU0ulyslAwMAAHtLKj4sy/rSaxwOh0KhkEKh0FhnAgAAkxif7QIAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjMtI9AAAAR+Su25LuEZK2Z8PidI9gO+x8AAAAo4gPAABgFPEBAACMIj4AAIBRxAcAADCK+AAAAEYRHwAAwCjiAwAAGJV0fGzbtk1LliyRz+eTw+HQU089lXDesiyFQiH5fD5lZmaqqKhI3d3dqZoXAADYXNLxceDAAZ1//vlqbGw86vm6ujrV19ersbFRXV1d8nq9Ki4u1uDg4LiHBQAA9pf026uXlJSopKTkqOcsy1JDQ4NqampUWloqSWppaZHH41Fra6tWrlw5vmkBAIDtpfQ1Hz09PYpEIgoGg/E1p9OpwsJCdXZ2HvU+w8PDisViCQcAAJi8UhofkUhEkuTxeBLWPR5P/Nz/qq2tldvtjh85OTmpHAkAABxjJuSvXRwOR8Jty7JGrR1RXV2taDQaP/r6+iZiJAAAcIxI+jUfX8Tr9Ur6dAdk9uzZ8fX+/v5RuyFHOJ1OOZ3OVI4BAACOYSnd+fD7/fJ6vWpvb4+vjYyMqKOjQwUFBan8UgAAwKaS3vn4+OOP9fbbb8dv9/T0aNeuXZo+fbrmzp2ryspKhcNhBQIBBQIBhcNhZWVlqaysLKWDAwAAe0o6Pnbs2KHLLrssfnvt2rWSpPLycj3yyCOqqqrS0NCQKioqNDAwoPz8fLW1tcnlcqVuagAAYFsOy7KsdA/xWbFYTG63W9FoVNnZ2Sl//Nx1W1L+mACA49eeDYvTPcIxIZmf33y2CwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGBUSt9eHQCA440d38Ih3X8ezM4HAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAUcQHAAAwivgAAABGER8AAMAo4gMAABhFfAAAAKOIDwAAYBTxAQAAjCI+AACAURMWHw888ID8fr9OPPFELViwQC+99NJEfSkAAGAjExIfTzzxhCorK1VTU6NXXnlFl156qUpKStTb2zsRXw4AANjIhMRHfX29brzxRt100006++yz1dDQoJycHDU1NU3ElwMAADaSkeoHHBkZ0c6dO7Vu3bqE9WAwqM7OzlHXDw8Pa3h4OH47Go1KkmKxWKpHkyQdHj44IY8LAIBdTMTP2COPaVnWl16b8vh4//33dejQIXk8noR1j8ejSCQy6vra2lrdcccdo9ZzcnJSPRoAAJDkbpi4xx4cHJTb7f7Ca1IeH0c4HI6E25ZljVqTpOrqaq1duzZ++/Dhw/rwww91yimnHPX6Y00sFlNOTo76+vqUnZ2d7nFsh+dvfHj+xo7nbnx4/sZnMj5/lmVpcHBQPp/vS69NeXzMmDFDU6ZMGbXL0d/fP2o3RJKcTqecTmfC2sknn5zqsSZcdnb2pPkXKB14/saH52/seO7Gh+dvfCbb8/dlOx5HpPwFp1OnTtWCBQvU3t6esN7e3q6CgoJUfzkAAGAzE/Jrl7Vr1+r666/XwoULtWjRIm3cuFG9vb1atWrVRHw5AABgIxMSHz/4wQ/0wQcf6M4779T+/fuVl5enP/3pT5o3b95EfLm0cjqduv3220f96ghfDc/f+PD8jR3P3fjw/I3P8f78Oayv8jcxAAAAKcJnuwAAAKOIDwAAYBTxAQAAjCI+AACAUcRHiuzZs0c33nij/H6/MjMzddppp+n222/XyMhIukezjfXr16ugoEBZWVm2fKM50x544AH5/X6deOKJWrBggV566aV0j2Qb27Zt05IlS+Tz+eRwOPTUU0+leyTbqK2t1UUXXSSXy6VZs2Zp2bJleuONN9I9lm00NTVp/vz58TcXW7RokZ599tl0j2Uc8ZEi//znP3X48GE99NBD6u7u1m9/+1s9+OCDuu2229I9mm2MjIxo+fLluvnmm9M9yjHviSeeUGVlpWpqavTKK6/o0ksvVUlJiXp7e9M9mi0cOHBA559/vhobG9M9iu10dHRo9erV2r59u9rb2/XJJ58oGAzqwIED6R7NFubMmaMNGzZox44d2rFjhy6//HItXbpU3d3d6R7NKP7UdgLdfffdampq0jvvvJPuUWzlkUceUWVlpT766KN0j3LMys/P14UXXqimpqb42tlnn61ly5aptrY2jZPZj8Ph0ObNm7Vs2bJ0j2JL7733nmbNmqWOjg5961vfSvc4tjR9+nTdfffduvHGG9M9ijHsfEygaDSq6dOnp3sMTDIjIyPauXOngsFgwnowGFRnZ2eapsLxKhqNShL/rxuDQ4cOadOmTTpw4IAWLVqU7nGMmrBPtT3e/etf/9L999+ve++9N92jYJJ5//33dejQoVEf1OjxeEZ9oCMwkSzL0tq1a3XJJZcoLy8v3ePYxu7du7Vo0SL997//1bRp07R582adc8456R7LKHY+vkQoFJLD4fjCY8eOHQn32bdvn7797W9r+fLluummm9I0+bFhLM8fvhqHw5Fw27KsUWvARFqzZo1effVVPf744+kexVbOPPNM7dq1S9u3b9fNN9+s8vJyvfbaa+keyyh2Pr7EmjVrdM0113zhNbm5ufF/3rdvny677LL4B+od75J9/vDlZsyYoSlTpoza5ejv7x+1GwJMlFtuuUXPPPOMtm3bpjlz5qR7HFuZOnWqTj/9dEnSwoUL1dXVpfvuu08PPfRQmiczh/j4EjNmzNCMGTO+0rXvvvuuLrvsMi1YsEDNzc064QQ2lpJ5/vDVTJ06VQsWLFB7e7u+853vxNfb29u1dOnSNE6G44FlWbrlllu0efNmvfjii/L7/ekeyfYsy9Lw8HC6xzCK+EiRffv2qaioSHPnztU999yj9957L37O6/WmcTL76O3t1Ycffqje3l4dOnRIu3btkiSdfvrpmjZtWnqHO8asXbtW119/vRYuXBjfZevt7dWqVavSPZotfPzxx3r77bfjt3t6erRr1y5Nnz5dc+fOTeNkx77Vq1ertbVVTz/9tFwuV3wHzu12KzMzM83THftuu+02lZSUKCcnR4ODg9q0aZNefPFFbd26Nd2jmWUhJZqbmy1JRz3w1ZSXlx/1+XvhhRfSPdox6fe//701b948a+rUqdaFF15odXR0pHsk23jhhReO+u9aeXl5ukc75n3e/+eam5vTPZotrFixIv7f7cyZM60rrrjCamtrS/dYxvE+HwAAwChelAAAAIwiPgAAgFHEBwAAMIr4AAAARhEfAADAKOIDAAAYRXwAAACjiA8AAGAU8QEAAIwiPgAAgFHEBwAAMIr4AAAARv0/4ypnD6k+vkUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "plt.figure()\n",
    "plt.hist(tcga_time_sub['T.CD8'])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "37b0a4eb",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#merge all the table above\n",
    "tcga_all=pd.merge(tcga_clinical_sub,mhc,on='Patients',how='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "b92dbc20",
   "metadata": {},
   "outputs": [],
   "source": [
    "tcga_all=pd.merge(tcga_all,tcga_pd,on='Patients',how='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "c33a85a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "tcga_all=pd.merge(tcga_all,tcga_treg_sub,on='Patients',how='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "d061ddab",
   "metadata": {},
   "outputs": [],
   "source": [
    "tcga_all=pd.merge(tcga_all,tcga_time_sub,on='Patients',how='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "96db94e5",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "T4b    111\n",
       "TX      47\n",
       "T3b     40\n",
       "T3a     39\n",
       "T2a     34\n",
       "T2      32\n",
       "'--     30\n",
       "T4a     28\n",
       "T0      23\n",
       "T1a     21\n",
       "T2b     15\n",
       "T4      15\n",
       "T3      14\n",
       "T1      10\n",
       "T1b     10\n",
       "Tis      8\n",
       "Name: ajcc_pathologic_t, dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#mapping tumor stage to tumor population\n",
    "tcga_all['ajcc_pathologic_t'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "a28d012b",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# TX: Primary tumour cannot be assessed.\n",
    "# T0: No evidence of primary tumour.\n",
    "# Tis: Melanoma in situ (Clark level I – it remains in the epidermis).\n",
    "# T1a: The melanoma is less than or equal to 1.0 mm thick (1.0 mm = 1/25 or .04 inches), without ulceration and < 1 mitioses / mm2.\n",
    "# T1b: The melanoma is less than or equal to 1.0 mm thick, > 1 mitoses/mm2, and/or with ulceration.\n",
    "# T2a: The melanoma is between 1.01 and 2.0 mm thick without ulceration.\n",
    "# T2b: The melanoma is between 1.01 and 2.0 mm thick with ulceration.\n",
    "\n",
    "# T3a: The melanoma is between 2.01 and 4.0 mm thick without ulceration.\n",
    "# T3b: The melanoma is between 2.01 and 4.0 mm thick with ulceration.\n",
    "# T4a: The melanoma is thicker than 4.0 mm without ulceration.\n",
    "# T4b: The melanoma is thicker than 4.0 mm with ulceration\n",
    "\n",
    "#3.3e7(4mm),5e5(1mm),1.7e6(1.5mm),4e6(2mm),1.4e7(3mm),2.6e8(4mm)\n",
    "tumor_size={'T4b':3.3e7,'T3b':1.4e7,'T3a':1.4e7,'T2a':1.7e6,'T2':1.7e6,\n",
    "            'T4a':3.3e7,'T1a':5e5,'T4':3.3e7,'T2b':1.7e6,'T3':1.4e7,'T1':5e5,'T1b':5e5,\n",
    "           'TX':1e5,'T0':1e5,'Tis':1e5}\n",
    "\n",
    "tumor_size_l=[]\n",
    "for i in range(len(tcga_all)):\n",
    "    t_stage=tcga_all.iloc[i,2]\n",
    "    try:\n",
    "        tumor_s=tumor_size[t_stage]\n",
    "    except:\n",
    "        tumor_s=0\n",
    "    tumor_size_l.append(tumor_s)\n",
    "tcga_all['tumor_size']=tumor_size_l"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "49f8002a",
   "metadata": {},
   "outputs": [],
   "source": [
    "#calculate the ratio of Treg to CD8+T in cibersort result\n",
    "tcga_all['Treg_pro']=tcga_all['T cells regulatory (Tregs)']/tcga_all['T cells CD8']\n",
    "\n",
    "#deal with abnormal value of Treg_pro\n",
    "for i in range(len(tcga_all)):\n",
    "    Treg_pro=tcga_all.loc[i,'Treg_pro']\n",
    "    if Treg_pro>=0 and Treg_pro<100:\n",
    "        pass\n",
    "    else:\n",
    "        tcga_all.loc[i,'Treg_pro']=0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "86dee5b1",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#calculate total MHC expression in ssGSEA result\n",
    "x=tcga_all['MHC-II HLA Geneset']+tcga_all['MHC-I HLA and APM Geneset']\n",
    "tcga_all['mhc']=tcga_all['MHC-II HLA Geneset']+tcga_all['MHC-I HLA and APM Geneset']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "430c8821",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PearsonRResult(statistic=0.8088017367892889, pvalue=1.3735154151201202e-111)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x=x.tolist()\n",
    "y=tcga_all['T.CD8']\n",
    "#correlation of MHC expression and CD8+T cells' scores\n",
    "r = np.corrcoef(x, y)\n",
    "scipy.stats.pearsonr(x, y) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "74b1be34",
   "metadata": {},
   "outputs": [],
   "source": [
    "#delete the samples without tumor stage record\n",
    "tcga_all=tcga_all[tcga_all['tumor_size']>0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "f205311f",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Because the expression of PDCD1 is long tail,normalize it by np.log(x+1)/np.log(max(x)+1)\n",
    "tcga_all['PDCD1'].describe()\n",
    "tcga_all['mu']=np.log(tcga_all['PDCD1']+1)/8.83"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "6b0b13f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "#normalize the MHC expression\n",
    "tcga_all['mhc_norm']=(tcga_all['mhc']+22245)/(22210+22245)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "74e6ea8c",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    447.000000\n",
       "mean      -0.023598\n",
       "std        1.222055\n",
       "min       -2.231578\n",
       "25%       -1.054362\n",
       "50%       -0.099072\n",
       "75%        0.900045\n",
       "max        3.384689\n",
       "Name: T.CD8, dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#because the ratio of CD8+T cells to tumor cells is about 0-0.03, map the CD8+T score to CD8+T/tumor ratio\n",
    "tcga_all['T.CD8'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8cb407ca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.006239244799999999"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cd8t_std=0.0053\n",
    "-1.054362*cd8t_std+cd8t_std*2.231578"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "9b24c4b4",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0053416263863523585\n"
     ]
    }
   ],
   "source": [
    "cd8t_std=0.03/(2.231578+3.384689)\n",
    "print(cd8t_std)\n",
    "cd8t_mean=cd8t_std*2.231578\n",
    "tcga_all['tcd8_pop']=(tcga_all['T.CD8']*cd8t_std+cd8t_mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 206,
   "id": "4ea7ecd0",
   "metadata": {},
   "outputs": [],
   "source": [
    "tcga_all['a']=np.random.uniform(1.45e-2,2.23e-2,len(tcga_all))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "id": "dbe89734",
   "metadata": {},
   "outputs": [],
   "source": [
    "tcga_all=tcga_all[tcga_all['ajcc_pathologic_m']=='M0']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 207,
   "id": "f2e880f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "tcga_all.to_csv('Preprocessed_data/tcga_all.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
